vllm serve \
    model_path \
    --served-model-name model_name \
    --host 0.0.0.0 \
    --port 6537 \
    --max-model-len 32768 \
    --max-seq-len-to-capture 32768 \
    --trust-remote-code \
    --tensor-parallel-size 8 \
    --enable-prefix-caching \
    --gpu-memory-utilization 0.9